#
# simulatePolarization.R
#
# Power simulation of model ability to detect polarization.
#
# Hill, Seth J. and Chris Tausanovitch. "A Disconnect in Representation? Comparison of Trends in Congressional and Public Polarization."
#

rm(list=ls())
if (.Platform$OS == "windows") {
  # Set working directory in location of script.
  .doit <- function() { # only works with R.exe; trap errors if using Rscript.exe
  frame_files <- lapply(sys.frames(), function(x) x$ofile);frame_files <- Filter(Negate(is.null), frame_files)
  PATH <- dirname(frame_files[[length(frame_files)]]); setwd(PATH) ; rm(PATH,frame_files)
  }
  try(.doit(),silent=T)
}

generateData <- function(generateIdeals,generateResponses,...) {
  # Function to generate observed response data.
  
  # Generate ideal points.
  x <- generateIdeals(...)
  
  # Generate response matrix.
  y <- generateResponses(x=x,...)
  
  return(list("x"=x,"y"=y))
}

genNormalIdeals <- function(n,mu=0,sigma=1,...) {
  # Generate n ideal points from a mu, sigma normal distribution.
  rnorm(n,mu,sigma)
}

genMultipleNormalIdeals <- function(n,mu=0,sigma=1,...) {
  # Generate samples from multiple normals.
  # Arguments are each lists for size n, mean mu, and sd sigma.
  x <- NULL
  for (i in 1:length(n)) {
    x <- c(x,rnorm(n[i],mu[i],sigma[i]))
  }
  x
}

genMultiNomResponses <- function(x,cats,alpha.mat,beta.mat,...) {
  # Generate stochastic multinomial responses consistent with IRT model.
  # Arguments.
  #  x - N-vector of ideal points.
  #  cats - K-vector of the number of response categories for each question.
  #  alpha.mat - K-by-max(cats) matrix of difficulty parameters for each
  #              question; first column could be all 0 for identification;
  #              for questions with < max(cats) categories, final columns are ignored.
  #  beta.mat  - K-by-max(cats) matrix of discrimination parameters for each
  #              question; first column could be all 0 for identification;
  #              for questions with < max(cats) categories, final columns are ignored.
  # Returns N-by-K matrix of responses.
  
  N <- length(x) ; K <- length(cats)
  res <- matrix(NA,nrow=N,ncol=K) # result matrix.
  # Simulate responses for each question.
  for (k in 1:K) {
    # Create matrix of probabilities.
    probs <- matrix(NA,nrow=N,ncol=cats[k]) # probabilities matrix for item k.
    for (i in 1:N) {
      for (j in 1:cats[k]) {
        probs[i,j] <- beta.mat[k,j]*x[i] - alpha.mat[k,j]
      }
    }
    # Transform to probabilities.
    probs <- exp(probs)
    probs <- t(apply(probs,1,function(x) x/sum(x)))
    # Sample responses probabilistically.
    res[,k] <- apply(probs,1,function(x) sample(1:length(x),size=1,prob=x))
  }
  res
}

normalizeMultinomial <- function(output,n,m,K){
  # A function to identify the results of running
  # the model
  bmin <- min(grep("beta",colnames(output)))
  bmax <- max(grep("beta",colnames(output)))
  amin <- min(grep("alpha",colnames(output)))
  amax <- max(grep("alpha",colnames(output)))
  xmin <- min(grep("x",colnames(output)))
  xmax <- max(grep("x",colnames(output)))

  x <- output[,xmin:xmax]
  alpha <- output[,amin:amax]
  beta <- output[,bmin:bmax]
   
  the999s <- which(alpha==999)

  for (i in 1:dim(output)[1]){
    sdx <- sd(x[i,])
    meanx <- mean(x[i,])
    x[i,] <- (x[i,] - meanx)/sdx
    alpha[i,] <- alpha[i,] - beta[i,]*meanx
    beta[i,] <- beta[i,]*sdx
  }
  alpha[the999s] <- NA
  beta[the999s] <- NA

  # set the polarity
  polaritybeta <- 11 # HARD-CODED: beta[1,2] sets the polarity
  polarity <- ifelse(colMeans(beta)[polaritybeta] > 0 ,1,-1)
  x <- x*polarity
  
  return(list(x=x))#,beta=beta,alpha=alpha))
}

estimateModel <- function(responses,K,loc="",adapt=100,iterations=1000,thin=1,...) {
  # Function to estimate the model as a function of a response matrix
  # and other arguments. Argument loc is directory location of multinomial.jags.
  library(rjags)
  
  # Prepare response matrix to be passed to JAGS
  jdata <- list(y=responses,n=dim(responses)[1],m=dim(responses)[2],K=K)
  
  # Tell JAGS not to bother estimating responses that don't exist
  placeholders <- list(alpha=matrix(999,jdata$m,max(K)),beta=matrix(999,jdata$m,max(K)))
  for (i in 1:jdata$m){
    for (j in 1:K[i]){
      placeholders$alpha[i,j] <- NA
      placeholders$beta[i,j] <-NA
    }
  }
  jdata$beta <- placeholders$beta
  jdata$alpha <- placeholders$alpha

  # prepare the model
  model <- jags.model(file=paste(loc,"multinomial.jags",sep=""),n.adapt=adapt,data=jdata)
  # run it
  output <- coda.samples(model, variable.names=c("x","beta","alpha"),
                    n.iter=iterations,thin=thin)
  # identification
  output <- normalizeMultinomial(output[[1]],jdata$n,jdata$m,jdata$K)
  
  return(output)
}

runSim <- function(generateIdeals,generateResponses,...) {
  # Function to implement one simulation as a function of parameters.
  
  # First, generate data.
  cat("Generating data for simulation...\n")
  dat <- generateData(generateIdeals,generateResponses,...)
  # Second, estimate model.
  output <- estimateModel(responses=dat$y,K=cats,...)
  # Return both.
  return(list("dat"=dat,"output"=output))
}

if (F) {
  #
  # Example.
  #
  cats <- sample(c(3,4,5),size=10,replace=T)
  alpha.mat <- matrix(runif(length(cats)*max(cats)),nrow=length(cats),ncol=max(cats))
  beta.mat <- matrix(runif(length(cats)*max(cats)),nrow=length(cats),ncol=max(cats))
  dat <- generateData(generateIdeals=genMultipleNormalIdeals,generateResponses=genMultiNomResponses,
                n=c(500,500),mu=c(0,0),sigma=c(1,1.1),cats=cats,alpha.mat=alpha.mat,beta.mat=beta.mat)
  output <- estimateModel(responses=dat$y,K=cats,adapt=100,iterations=100,thin=1)
  # Check.
  plot(dat$x,colMeans(output$x))
  plot(alpha.mat,colMeans(output$alpha))
  plot(beta.mat,colMeans(output$beta))
  # Alternatively:
  #res <- runSim(generateIdeals=genMultipleNormalIdeals,generateResponses=genMultiNomResponses,
  #              n=c(500,500),mu=c(0,0),sigma=c(1,1.1),cats=cats,alpha.mat=alpha.mat,beta.mat=beta.mat,
  #              adapt=100,iterations=100,thin=1)
  stop()
}

#
# Implementation.
#
# Call in item parameter estimates from full joint multinomial run on ANES 1984-2008 for 10 items.
load("forSeth.RData")

if (T) {
  the.date <- Sys.time()
  set.seed(as.numeric(the.date)) # so different sims start differently.
  N.sims <- 10
  # What N to use?
  if (F) {
    years <- c(1984, 1986, 1988, 1990, 1992, 1994, 1996, 2000, 2004, 2008)
    library(foreign)
    data <- read.dta("../anes_cdf.dta")
    table(data[data$VCF0004 %in% years,"VCF0004"])
    joe <- table(data[data$VCF0004 %in% years,"VCF0004"])
  }

  n <- c(2000,2000) # sample size of each separate ideal point distribution.
  mu <- c(0,0) # means of each ideal point distribution.
  sigma <- c(1,1.1) # standard deviations of each ideal point distribution.
  for (i in 1:N.sims) {
    bigRes <- runSim(generateIdeals=genMultipleNormalIdeals,generateResponses=genMultiNomResponses,
                n=n,mu=mu,sigma=sigma,cats=cats,alpha.mat=alpha.mat,beta.mat=beta.mat,
                adapt=10000,iterations=5000,thin=2)
    finish.date <- Sys.time()
  save(the.date,finish.date,bigRes,
       file=sprintf("sample_output/simulatePolarization-%s-%i.RData",format(the.date, "%a %b %d %Y %Z %H:%M:%OS6"),i))
  }
  # Save result.
}

